Opg. 1

Gradienten er

\(\begin{bmatrix} \frac{1}{n} \sum_i^n 2(a + bs_i - d_i) \\ \frac{1}{n} \sum_i^n 2(a + bs_i -d_i)s_i \end{bmatrix}\)

ab <- c(-20,3)
n <- 50
ms <- function(ab) {
  ab[1] + ab[2] * cars$speed
}

f_i <- function(ab) {
  (ms(ab) - cars$dist)^2
}

f <- function(ab) {
  1/n * sum(f_i(ab))
}

d_f <- function(ab) {
  grad(f,ab)
}

dd_f <- function(ab) {
  hessian(f,ab)
}

backtrack <- function(a_bar = 3, rho = 0.3, c = 0.2, func = f, Dfunc = d_f, x_k = ab, c_2 = 0.5) {
  a <- a_bar
  itt <- 0
  while ( (func(x_k + a * (-Dfunc(x_k)) ) ) > (func(x_k) + c * a * t(Dfunc(x_k)) %*% (-Dfunc(x_k))) ) {
  # while (t(Dfunc(x_k + a * -Dfunc(x_k))) %*% -Dfunc(x_k) < c_2 * t(Dfunc(x_k)) %*% -Dfunc(x_k) ) {
    itt <- itt + 1
    a <- rho * a
  # }
  }
  a
}
#params
c1 <- 0.001
c2 <- 0.9
a_lo <- 0.001
a_hi <- 0.08
a_0 <- 0.1
# Implementation of Algorithm 3.6
# (Zoom)
zoom <- function(x_k, a_lo, a_hi, c1, c2, func = f, g = d_f) {
    f_k <- func(x_k)
    g_k <- g(x_k)
    p_k <- -g_k
    
    k <- 0
    k_max <- 1000   # Maximum number of iterations.
    done <- FALSE
    
    while(!done) {
        k <- k + 1
        phi_lo <- func(x_k + a_lo*p_k)

        a_k <- 0.5*(a_lo + a_hi)
        phi_k <- func(x_k + a_k*p_k)
        dphi_k_0 <- g_k%*%p_k
        l_k <- f_k + c1*a_k*dphi_k_0
        
        if ((phi_k > l_k) | (phi_k >= phi_lo)) {
            a_hi <- a_k
        } else {
            dphi_k <- p_k %*% g(x_k + a_k*p_k)
            
            if (abs(dphi_k) <= -c2*dphi_k_0) {
              return(a_k)
            }
            
            if (dphi_k*(a_hi - a_lo) >= 0) {
                a_hi <- a_lo
            }
            
            a_lo <- a_k
        }
        
        done <- (k > k_max)
    }
    
    return(a_k)
}

alpha <- function(a_0, x_k, c1, c2, func = f, g = d_f) {
  a_max <- 4*a_0 # Maximum step length. Can also be given as argument.
  f_k <- func(x_k)
  phi_k <- f_k
  a_1 <- a_0
  a0 <- 0
  a_k <- a_1
  a_k_old <- a0
  
  k <- 0
  k_max <- 1000   # Maximum number of iterations.
  done <- FALSE
  while(!done) {
    k <- k + 1
    f_k <- func(x_k)
    g_k <- g(x_k)
    p_k <- -g_k
    
    phi_k_old <- phi_k
    phi_k <- func(x_k + a_k*p_k)
    dphi_k_0 <- g_k%*%p_k
    l_k <- f_k + c1*a_k*dphi_k_0
    
    if ((phi_k > l_k) || ((k > 1) && (phi_k >= phi_k_old))) {
      return(zoom(x_k, a_k_old, a_k, c1, c2))
    }
    
    dphi_k <- p_k %*% g(x_k + a_k*p_k)
    
    if (abs(dphi_k) <= -c2*dphi_k_0) {
      return(a_k)
    }
    
    if (dphi_k >= 0) {
      return(zoom(a_k, a_k_old, x_k, c1, c2))
    }
    
    a_k_old <- a_k
    a_k <- rho*a_k + (1 - rho)*a_max # e.g. rho <- 0.5
    done <- (k > k_max)
  }
  
  return(a_k)
}
#steepest descent
optimer_identitet_backtrack <- function(x_0 = ab) {
  x_k <- x_0
  a_k <- 2
  itt <- 0
  steps <- c()
  plot(dist ~ speed, cars)
  while (norm(t(d_f(x_k)), type = "2") > 1e-5) {
    itt <- 1 + itt
    p_k <- -d_f(x_k)
    a_k <- backtrack(a_k, rho = 0.5, c = 0.001, f, d_f, x_k)
    x_k <- x_k + a_k * t(p_k)
    steps[itt] <- a_k
    if (itt %% 500 == 0) {
      abline(x_k)
    }
    if (itt == 100000) {
      break
    }
  }
  cat("x* = ", x_k, "f(x*) =", f(x_k), "iteration =", itt, "steps = ", steps[1:5])
}
# optimer_identitet_backtrack()
optimer_identitet_zoom <- function(x_0 = ab) {
  x_k <- x_0
  a_k <- 2
  itt <- 0
  steps <- c()
  plot(dist ~ speed, cars)
  while (norm(t(d_f(x_k)), type = "2") > 1e-5) {
    itt <- 1 + itt
    p_k <- -d_f(x_k)
    a_k <- alpha(a_0, x_k, c1,c2)
    x_k <- x_k + a_k * t(p_k)
    steps[itt] <- a_k
    if (itt %% 500 == 0) {
      abline(x_k)
    }
    if (itt == 100000) {
      break
    }
  }
  cat("x* = ", x_k, "f(x*) =", f(x_k), "iteration =", itt, "steps = ", steps[1:5])
}
# optimer_identitet_zoom()
#newton
optimer_newton_backtrack <- function(x_0 = ab, output = TRUE) {
  x_k <- x_0
  a_k <- 2
  itt <- 0
  steps <- c()
  x_plot <- c()
  y_plot <- c()
  plot(dist ~ speed, cars)
  while (norm(t(d_f(x_k)), type = "2") > 1e-4) {
    itt <- 1 + itt
    p_k <- solve(dd_f(x_k), -d_f(x_k))
    a_k <- backtrack(a_k, rho = 0.5, c = 0.01, f, d_f, x_k)
    x_k <- x_k + a_k * t(p_k)
    if (itt %% 50 == 0) {
      abline(x_k)
      x_plot[itt/50] <- x_k[1]
      y_plot[itt/50] <- x_k[2]
    }
    steps[itt] <- a_k
    if (itt == 100000) {
      break
    }
  }
  if (output == TRUE) {
    cat("x* = ", x_k, "f(x*) =", f(x_k), "iteration =", itt, "steps = ", steps[1:5])
  }
  else
  cbind(x_plot, y_plot)
}
xy_backtrack <- optimer_newton_backtrack(output = FALSE)
optimer_newton_backtrack(output = TRUE)

## x* =  -17.5791 3.932409 f(x*) = 227.0704 iteration = 7956 steps =  0.001953125 0.001953125 0.001953125 0.001953125 0.001953125
#newton
optimer_newton_zoom <- function(x_0 = ab, output = TRUE) {
  x_k <- x_0
  a_k <- 2
  itt <- 0
  steps <- c()
  x_plot <- c()
  y_plot <- c()
  plot(dist ~ speed, cars)
  while (norm(t(d_f(x_k)), type = "2") > 1e-4) {
    itt <- 1 + itt
    p_k <- solve(dd_f(x_k), -d_f(x_k))
    a_k <- alpha(a_0, x_k, c1,c2)
    x_k <- x_k + a_k * t(p_k)
    if (itt %% 50 == 0) {
      abline(x_k)
      x_plot[itt/50] <- x_k[1]
      y_plot[itt/50] <- x_k[2]
    }
    steps[itt] <- a_k
    if (itt == 100000) {
      break
    }
  }
  if (output == TRUE) {
    cat("x* = ", x_k, "f(x*) =", f(x_k), "iteration =", itt, "steps = ", steps[1:5])
  }
  else
  cbind(x_plot, y_plot)
}
xy_zoom <- optimer_newton_zoom(output = FALSE)
optimer_newton_zoom(output = TRUE)

## x* =  -17.5791 3.932409 f(x*) = 227.0704 iteration = 4970 steps =  0.003125 0.003125 0.003125 0.003125 0.003125
benchmark_result <- microbenchmark(optimer_identitet_backtrack(), optimer_identitet_zoom(), optimer_newton_backtrack(), optimer_newton_zoom(), times = 1)

## x* =  -17.5791 3.932409 f(x*) = 227.0704 iteration = 4970 steps =  0.003125 0.003125 0.003125 0.003125 0.003125

## x* =  -17.5791 3.932409 f(x*) = 227.0704 iteration = 7956 steps =  0.001953125 0.001953125 0.001953125 0.001953125 0.001953125

## x* =  -17.57914 3.932412 f(x*) = 227.0704 iteration = 26768 steps =  0.001953125 0.001953125 0.001953125 0.001953125 0.001953125

## x* =  -17.57913 3.932411 f(x*) = 227.0704 iteration = 12869 steps =  0.003125 0.003125 0.003125 0.003125 0.003125
ms_plot <- function(x,y) {
  x + y * cars$speed
}

f_i_plot <- function(x,y) {
  (ms_plot(x,y) - cars$dist)^2
}

f_plot <- function(x,y) {
  1/n * sum(f_i_plot(x,y))
}


fkts_vaerdier_backtrack <- apply(xy_backtrack, 1, f)
# x <- seq(-20,-17,length = 200)
# y <- seq(2.9,4.2, length = 200)
x <- xy_backtrack[,1]
y <- xy_backtrack[,2]
z <- outer(x, y, FUN = Vectorize("f_plot"))
plot_ly(x = x, y = y, z = ~z) %>% add_surface()  %>% 
  add_trace(x = xy_backtrack[,1], y = xy_backtrack[,2], z = fkts_vaerdier_backtrack)
## No trace type specified:
##   Based on info supplied, a 'scatter3d' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter3d
## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
fkts_vaerdier_zoom <- apply(xy_zoom, 1, f)
# x <- seq(-20,-17,length = 200)
# y <- seq(2.9,4.2, length = 200)
x <- xy_zoom[,1]
y <- xy_zoom[,2]
z <- outer(x, y, FUN = Vectorize("f_plot"))
plot_ly(x = x, y = y, z = ~z) %>% add_surface()  %>% 
  add_trace(x = xy_zoom[,1], y = xy_zoom[,2], z = fkts_vaerdier_zoom)
## No trace type specified:
##   Based on info supplied, a 'scatter3d' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter3d
## No scatter3d mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
summary(lm(cars$dist ~ cars$speed))
## 
## Call:
## lm(formula = cars$dist ~ cars$speed)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -29.069  -9.525  -2.272   9.215  43.201 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -17.5791     6.7584  -2.601   0.0123 *  
## cars$speed    3.9324     0.4155   9.464 1.49e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.38 on 48 degrees of freedom
## Multiple R-squared:  0.6511, Adjusted R-squared:  0.6438 
## F-statistic: 89.57 on 1 and 48 DF,  p-value: 1.49e-12
optim(ab, f, hessian = TRUE)
## $par
## [1] -17.583175   3.932699
## 
## $value
## [1] 227.0704
## 
## $counts
## function gradient 
##       53       NA 
## 
## $convergence
## [1] 0
## 
## $message
## NULL
## 
## $hessian
##      [,1]   [,2]
## [1,]  2.0  30.80
## [2,] 30.8 529.12
phi_lig_nul <- function(x_k) {
  1/n * sum((x_k[2] * cars$speed + x_k[1] - cars$dist) /(d_f(x_k)[2] * cars$speed + d_f(x_k)[1]))
}

optimer_identitet_phi <- function(x_0 = ab) {
  x_k <- x_0
  a_k <- 2
  itt <- 0
  steps <- c()
  plot(dist ~ speed, cars)
  while (norm(t(d_f(x_k)), type = "2") > 1e-5) {
    itt <- 1 + itt
    p_k <- -d_f(x_k)
    a_k <- phi_lig_nul(x_k)
    x_k <- x_k + a_k * t(p_k)
    steps[itt] <- a_k
    if (itt %% 1 == 0) {
      abline(x_k)
    }
    if (itt == 100000) {
      break
    }
  }
  cat("x* = ", x_k, "f(x*) =", f(x_k), "iteration =", itt, "steps = ", steps[1:5])
}
# optimer_identitet_phi()


optimer_newton_phi <- function(x_0 = ab) {
  x_k <- x_0
  a_k <- 2
  itt <- 0
  steps <- c()
  x_plot <- c()
  f_plot <- c()
  plot(dist ~ speed, cars)
  while (norm(t(d_f(x_k)), type = "2") > 1e-5) {
    itt <- 1 + itt
    p_k <- solve(dd_f(x_k), -d_f(x_k))
    a_k <- phi_lig_nul(x_k)
    x_k <- x_k + a_k * t(p_k)
    if (itt %% 50 == 0) {
      abline(x_k)
    }
    steps[itt] <- a_k
    if (itt == 100000) {
      break
    }
  }
  cat("x* = ", x_k, "f(x*) =", f(x_k), "iteration =", itt, "steps = ", steps[1:5])
}
# optimer_newton_phi()